library(readr)
library(ggplot2)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(stringr)
library(jhur)
Read in the charm city circulator dataset:
circ = read_csv("http://johnmuschelli.com/intro_to_r/data/Charm_City_Circulator_Ridership.csv") or circ = read_circulator()
circ = read_csv("http://johnmuschelli.com/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
## Parsed with column specification:
## cols(
## day = col_character(),
## date = col_character(),
## orangeBoardings = col_integer(),
## orangeAlightings = col_integer(),
## orangeAverage = col_double(),
## purpleBoardings = col_integer(),
## purpleAlightings = col_integer(),
## purpleAverage = col_double(),
## greenBoardings = col_integer(),
## greenAlightings = col_integer(),
## greenAverage = col_double(),
## bannerBoardings = col_integer(),
## bannerAlightings = col_integer(),
## bannerAverage = col_double(),
## daily = col_double()
## )
# covert dates
circ = mutate(circ, date = mdy(date))
# change colnames for reshaping
colnames(circ) = colnames(circ) %>%
str_replace("Board", ".Board") %>%
str_replace("Alight", ".Alight") %>%
str_replace("Average", ".Average")
# make long
long = gather(circ, "var", "number",
starts_with("orange"),
starts_with("purple"), starts_with("green"),
starts_with("banner"))
# separate
long = separate(long, var, into = c("line", "type"),
sep = "[.]")
or run:
long = read_circulator_long()
## Parsed with column specification:
## cols(
## day = col_character(),
## date = col_character(),
## orangeBoardings = col_integer(),
## orangeAlightings = col_integer(),
## orangeAverage = col_double(),
## purpleBoardings = col_integer(),
## purpleAlightings = col_integer(),
## purpleAverage = col_double(),
## greenBoardings = col_integer(),
## greenAlightings = col_integer(),
## greenAverage = col_double(),
## bannerBoardings = col_integer(),
## bannerAlightings = col_integer(),
## bannerAverage = col_double(),
## daily = col_double()
## )
## take just average ridership per day
avg = filter(long, type == "Average")
avg = filter(avg, !is.na(number))
# separate
type_wide = spread(long, type, value = number)
head(type_wide)
## # A tibble: 6 x 7
## day date daily line Alightings Average Boardings
## <chr> <date> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 Friday 2010-01-15 1644 banner NA NA NA
## 2 Friday 2010-01-15 1644 green NA NA NA
## 3 Friday 2010-01-15 1644 orange 1643 1644 1645
## 4 Friday 2010-01-15 1644 purple NA NA NA
## 5 Friday 2010-01-22 1394. banner NA NA NA
## 6 Friday 2010-01-22 1394. green NA NA NA
In these questions, try to use ggplot2 if possible.
qplot(x = date, y = number, data = avg)
orange, purple, green, banner)qplot(x = date, y = number, data = avg, colour = line)
first_plot = qplot(x = date, y = number, data = avg, colour = line)
print(first_plot)
qplot(x = date, y = number, data = avg, colour = line) + geom_smooth(aes(group = line), colour= "black")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
qplot(x = date, y = number, data = avg, colour = day)
qplot(x = date, y = number, data = avg, colour = line) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
line (with banner –> blue)pal = c("blue", "darkgreen","orange","purple")
qplot(x = date, y = number, data = avg, colour = line) +
scale_colour_manual(values = pal)
lineqplot(x = date, y = number, data= avg, facets = ~line)
qplot(x = date, y = number, data= avg) +
facet_wrap( ~ line)
qplot(x = date, y = number, data= avg, facets = ~line,
colour = line) + scale_colour_manual(values=pal)
day of the week, colored by lineqplot(x = date, y = number, data= avg, facets = ~day,
colour = line) + scale_colour_manual(values=pal)
ggplot(aes(x = date, y = number, colour = line), data= avg) +
geom_point() +
facet_wrap( ~day) + scale_colour_manual(values=pal)
avg) by date, colored by line (same as 1a). (do not take an average, use the average column for each route/line) Make the x-label "Year" Make the y-label "Number of People" Use the black and white theme theme_bw() Change the text_size to (text = element_text(size = 20)) in themefirst_plot +
xlab("Year") + ylab("Number of People") + theme_bw() +
theme(text = element_text(size = 20))
orange route versus date as a solid line, and add dashed “error” lines based on the boardings and alightings. the line colors should be orange. (hint linetype is an aesthetic for lines) - see also scale_linetype and scale_linetype_manual Alightings = "dashed", Boardings = "dashed", Average = "solid"orange = long %>% filter(line == "orange")
line type is dashed
ggplot(orange, aes(x = date, y = number)) +
geom_line(linetype = "dashed", colour ="orange")
dont do this, trying to find column named orange
ggplot(orange, aes(x = date, y = number)) +
geom_line(linetype = "dashed", aes(colour="orange"))
now line dashedness varies by the type
ggplot(orange, aes(x = date, y = number)) +
geom_line(aes(linetype = type), colour = "orange")
this one as a quick plot
qplot(data = orange, x = date, y = number,
linetype = type, geom = "line", colour = "orange")
ggplot(orange, aes(x = date, y = number)) +
geom_line(aes(linetype = type), colour = "orange") +
scale_linetype_manual(values = c("dashed",
"dashed", "solid"))
ggplot(orange, aes(x = date, y = number)) +
geom_line(aes(linetype = type), colour = "orange") +
scale_linetype_manual(
values = c(Alightings = "dashed",
Boardings = "dashed",
Average = "solid"))